
%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile builds the IFS-Base dataset from the raw data provided by the Bundesbank's  Research Data and Service Centre (RDSC)
% 
%  =======================================================================================================================================================
% 
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


clear all;
close all;

projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));


% Load and clean IFS data
load(strcat(projectPath, 'Data\IFS\IFS_All'), 'IFS_All', 'ufund', 'umonth');

%%%%%%%%%%%%%%%%%%%  Fund type

IFS_All.FCAT(IFS_All.FCAT == 8) = 3;

[~,idxWhereISIN] = ismember(IFS_All.ISIN,ufund);

IFS_All.artmittel_clean = NaN(height(IFS_All),1);
for i = 1:length(ufund)

   x = IFS_All.FCAT(idxWhereISIN==i);
   
   % Use latest reported FCAT (which we call "artmittel_clean")
   IFS_All.artmittel_clean(idxWhereISIN==i) = x(end); 

end
clear i x;

%%%%%%%%%%%%%%%%%%%  UCITSAIF

IFS_All.SPEZIAL = zeros(height(IFS_All),1);
IFS_All.SPEZIAL(IFS_All.UCITSAIF>=200) = 1;

%%%%%%%%%%%%%%%%%%% identify ETFs


%% step 1: identify etfs as those funds that are -simultaneously- exchange-traded and index funds at some point in time

etf = startsWith(IFS_All.ETFF,'1');
sum(etf)
indexfonds = startsWith(IFS_All.INDXF,'1');
etf = max(etf,indexfonds);

sum(etf),


%% step 2: label those funds consistently as etfs

tt = find(etf==1);
etf_id = unique(IFS_All.ISIN(tt));
[~,idxWhereETF] = ismember(IFS_All.ISIN,etf_id);
etf(idxWhereETF>0) = 1;

IFS_All.etf = etf;

clear tt etf*;

%%%%%%% generate fund returns and flows (Note: returns are calculated
%%%%%%% based on rueckn_preis - this is motivated by the fact that this is
%%%%%%% the most relevant price indicator for investors, i.e., what would
%%%%%%% I get by redeeming my fund share?
 
IFS_All.Return     = NaN*IFS_All.DATUM;
IFS_All.ReturnDiv  = NaN*IFS_All.DATUM;
IFS_All.Netflows   = NaN*IFS_All.DATUM;

[~,idxWhereISIN] = ismember(IFS_All.ISIN,ufund);
[~,idxWhereDATUM] = ismember(IFS_All.DATUM,umonth);

for i = 1:length(ufund)

   i,
   tt = find(idxWhereISIN==i);
   
   if length(tt)>1
       x  = [NaN; (IFS_All.RUECKN_PREIS(tt(2:end))-IFS_All.RUECKN_PREIS(tt(1:end-1)))./IFS_All.RUECKN_PREIS(tt(1:end-1))]; 
       x=   [idxWhereDATUM(tt) x];
       x2 = [NaN; ((IFS_All.RUECKN_PREIS(tt(2:end))+IFS_All.AUSSCHUETTUNG(tt(2:end))./IFS_All.UMLAUF_STUECK(tt(1:end-1)))-IFS_All.RUECKN_PREIS(tt(1:end-1)))./IFS_All.RUECKN_PREIS(tt(1:end-1))]; 
       x2 = [idxWhereDATUM(tt) x2];
      
       y  = [NaN;(IFS_All.MITTELZUFL(tt(2:end))-IFS_All.MITTELABFL(tt(2:end)))./IFS_All.FONDSVERM(tt(1:end-1))]; 
       y  = [idxWhereDATUM(tt) y];
       
       k = find(isinf(x(:,2))==1);
       x(k,2) = NaN;
       
       k = find(isinf(x2(:,2))==1);
       x2(k,2) = NaN;
       
       k = find(isinf(y(:,2))==1);
       y(k,2) = NaN;
       
       if max(diff(idxWhereDATUM(tt)))>1 % only use subsequent months!
            tau = find(diff(idxWhereDATUM(tt))>1) + 1;
            x(tau,2) = NaN;
            x2(tau,2) = NaN;
            y(tau,2) = NaN;
       end

       IFS_All.Return(tt)    = x(:,2); 
       IFS_All.ReturnDiv(tt) = x2(:,2);
       IFS_All.Netflows(tt)  = y(:,2);
   end

   clear y x x2 k tt tau;

end

keep IFS_All ufund umonth;

IFS_All.ISIN = cellstr(IFS_All.ISIN);
ufund = cellstr(ufund);

%%% calculate fund family tna based on public ECB IF list (Source:
% https://www.ecb.europa.eu/stats/pdf/money/IF_overview_2019_2022.zip?8129f0c00738512a881a1ea4d9bfae36
% https://www.ecb.europa.eu/stats/pdf/money/IF_overview_2014_2018.zip?1adedbad1d60807ba283ba7b034fb643
% https://www.ecb.europa.eu/stats/pdf/money/IF_overview_2009_2013.zip?d4f27c26379f82f2c57cda904b9dbe36
%)

IFS_All.KAG_ManagementCompanyName = cell(height(IFS_All),1);
IFS_All.Name = cell(height(IFS_All),1);

for t = 1:length(umonth)

    t, 
    
    year  = floor(umonth(t)/100);
    month = umonth(t) - (100*year);
    quarter = ceil(month/3);
    
    if 10*year+quarter<=20201
    load(strcat(projectPath, 'Data\IFS\ECB_IF_List\mat\IF_', num2str(year), '_Q' , num2str(quarter)), 'IF_Data');
    else
    load(strcat(projectPath, 'Data\IFS\ECB_IF_List\mat\IF_', num2str(year), '_Q1'), 'IF_Data');    
    end
    
    tt = find(IFS_All.DATUM==umonth(t));
    [~,idxWhereISIN] = ismember(IFS_All.ISIN(tt),IF_Data.ISIN);
    IFS_All.KAG_ManagementCompanyName(tt(idxWhereISIN>0)) = IF_Data.ManagementCompanyName(idxWhereISIN(idxWhereISIN>0));
    if sum(strcmp(IF_Data.Properties.VariableNames,'NameOfTheFund'))>0
    IFS_All.Name(tt(idxWhereISIN>0)) = IF_Data.NameOfTheFund(idxWhereISIN(idxWhereISIN>0));
    elseif sum(strcmp(IF_Data.Properties.VariableNames,'OrgunitName'))>0
    IFS_All.Name(tt(idxWhereISIN>0)) = IF_Data.OrgunitName(idxWhereISIN(idxWhereISIN>0));  
    elseif sum(strcmp(IF_Data.Properties.VariableNames,'Name'))>0
    IFS_All.Name(tt(idxWhereISIN>0)) = IF_Data.Name(idxWhereISIN(idxWhereISIN>0)); 
    end
    
    clear tt i* familyTNA year month quarter IF_Data ufamily;
end
clear t;

% calculate family tna (interpolate missing asset management company names first)
[~,idxWhereISIN] = ismember(IFS_All.ISIN,ufund);

for i = 1:length(ufund)

    i,
    
    ii = find(idxWhereISIN==i);
    x = IFS_All.KAG_ManagementCompanyName(ii);
    if sum(cellfun(@isempty,x))<length(ii)
        name = x;
        name(cellfun(@isempty,name)) =  [];
        name = name(end);
        ii = ii(cellfun(@isempty,x));
        if isempty(name)==0 & isempty(ii)==0
           IFS_All.KAG_ManagementCompanyName(ii) = name; 
        end
    end

    clear ii x name;

end

IFS_All.KAG_TNA = NaN(height(IFS_All),1);

ufamily = uniquecell(IFS_All.KAG_ManagementCompanyName);
ufamily(cellfun(@isempty,ufamily))=[];

[~,idxWhereDATUM]  = ismember(IFS_All.DATUM,umonth);

for t = 1:length(umonth)

    t,
    for i = 1:length(ufamily)
        ii = find(idxWhereDATUM==t & strcmp(IFS_All.KAG_ManagementCompanyName,ufamily(i))==1); % ==i);
        IFS_All.KAG_TNA(ii) = nansum(IFS_All.FONDSVERM(ii));
        clear ii;
    end

end

clear i* ufamily t* ans;

save(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));